// ----------------------------------------------------------------------------
// Second stage boot code
// Copyright (c) 2019-2021 Raspberry Pi (Trading) Ltd.
// SPDX-License-Identifier: BSD-3-Clause
//
// Device:      Winbond W25X10CL
//
// Description: Configures W25X10CL to run in Dual I/O continuous read XIP mode
//
// Details:     * Disable SSI
//              * Configure SSI to generate 8b command + 28b address + 2 wait,
//                with address and data using dual SPI mode
//              * Enable SSI
//              * Generate dummy read with command = 0xBB, top 24b of address
//                of 0x000000 followed by M[7:0]=0010zzzz (with the HiZ being
//                generated by 2 wait cycles).  This leaves the W25X10CL in
//                continuous read mode
//              * Disable SSI
//              * Configure SSI to generate 0b command + 28b address + 2 wait,
//                with the extra 4 bits of address LSB being 0x2 to keep the
//                W25X10CL in continuous read mode forever
//              * Enable SSI
//              * Set VTOR = 0x10000100
//              * Read MSP reset vector from 0x10000100 and write to MSP (this
//                will also enable XIP mode in the SSI wrapper)
//              * Read PC reset vector from 0x10000104 and jump to it
//
// Building:    * This code must be linked to run at 0x20000000
//              * The code will be padded to a size of 256 bytes, including a
//                4-byte checksum. Therefore code size cannot exceed 252 bytes.
// ----------------------------------------------------------------------------

#include "pico/asm_helper.S"
#include "hardware/regs/addressmap.h"
#include "hardware/regs/ssi.h"

// The serial flash interface will run at clk_sys/PICO_FLASH_SPI_CLKDIV.
// This must be an even number.
#ifndef PICO_FLASH_SPI_CLKDIV
#define PICO_FLASH_SPI_CLKDIV 4
#endif

pico_default_asm_setup

// ----------------------------------------------------------------------------
// The "System Control Block" is a set of internal Cortex-M0+ control registers
// that are memory mapped and accessed like any other H/W register.  They have
// fixed addresses in the address map of every Cortex-M0+ system.
// ----------------------------------------------------------------------------

.equ SCB_VTOR,          0xE000ED08      // RW Vector Table Offset Register

// ----------------------------------------------------------------------------
// Winbond W25X10CL Supported Commands
// Taken from "w25x10cl_reg_021714.pdf"
// ----------------------------------------------------------------------------

.equ W25X10CL_CMD_READ_DATA_FAST_DUAL_IO, 0xbb

// ----------------------------------------------------------------------------
// Winbond W25X10CL "Mode bits" are 8 special bits sent immediately after
// the address bits in a "Read Data Fast Dual I/O" command sequence.
// Of M[7:4], they say M[7:6] are reserved (set to zero), and bits M[3:0]
// are don't care (we HiZ).  Only M[5:4] are used, and they must be set
// to M[5:4] = 2'b10 to enable continuous read mode.
// ----------------------------------------------------------------------------

.equ W25X10CL_MODE_CONTINUOUS_READ,        0x20

// ----------------------------------------------------------------------------
// Start of 2nd Stage Boot Code
// ----------------------------------------------------------------------------

.org 0

.section .text

// lr will be zero on entry if entered from the bootrom, and the boot_stage2 is expected
// to continue into the binary via the vector table at 0x10000100.
//
// lr will be non-zero on entry if this code has been copied into RAM by user code and called
// from there, and the boot_stage2 should just return normally.
//
// r3 holds SSI base, r0...2 used as temporaries. Other GPRs not used.
regular_func _stage2_boot
    push {lr}
    ldr r3, =XIP_SSI_BASE                // Use as base address where possible

// We are primarily interested in setting up Flash for DSPI XIP w/ continuous read

    movs r1, #0
    str r1, [r3, #SSI_SSIENR_OFFSET] // Disable SSI to allow further config

// The Boot ROM sets a very conservative SPI clock frequency to be sure it can
// read the initial 256 bytes from any device.  Here we can be more aggressive.

    movs r1, #PICO_FLASH_SPI_CLKDIV
    str r1, [r3, #SSI_BAUDR_OFFSET]  // Set SSI Clock

// First we need to send the initial command to get us in to Fast Read Dual I/O
// mode.  As this transaction requires a command, we can't send it in XIP mode.
// To enter Continuous Read mode as well we need to append 4'b0010 to the address
// bits and then add a further 4 don't care bits.  We will construct this by
// specifying a 28-bit address, with the least significant bits being 4'b0010.
// This is just a dummy transaction so we'll perform a read from address zero
// and then discard what comes back.  All we really care about is that at the
// end of the transaction, the Winbond W25X10CL device is in Continuous Read mode
// and from then on will only expect to receive addresses.

#define CTRLR0_ENTER_XIP \
    (SSI_CTRLR0_SPI_FRF_VALUE_DUAL         /* Dual I/O mode */                \
        << SSI_CTRLR0_SPI_FRF_LSB) |                                          \
    (31 << SSI_CTRLR0_DFS_32_LSB)  |       /* 32 data bits */    \
    (SSI_CTRLR0_TMOD_VALUE_EEPROM_READ     /* Send INST/ADDR, Receive Data */ \
        << SSI_CTRLR0_TMOD_LSB)

    ldr r1, =(CTRLR0_ENTER_XIP)
    str r1, [r3, #SSI_CTRLR0_OFFSET]

    movs r1, #0x0                   // NDF=0 (single 32b read)
    str r1, [r3, #SSI_CTRLR1_OFFSET]

#define SPI_CTRLR0_ENTER_XIP \
    (7 << SSI_SPI_CTRLR0_ADDR_L_LSB) |         /* Send 28 bits (24 address + 4 mode) */ \
    (2 << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) |    /* Hi-Z the other 4 mode bits (2 cycles @ dual I/O = 4 bits) */ \
    (SSI_SPI_CTRLR0_INST_L_VALUE_8B \
        << SSI_SPI_CTRLR0_INST_L_LSB) |        /* 8-bit instruction */ \
    (SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C2A      /* Send Command in serial mode then address in Dual I/O mode */ \
        << SSI_SPI_CTRLR0_TRANS_TYPE_LSB)

    ldr r1, =(SPI_CTRLR0_ENTER_XIP)
    ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET)  // SPI_CTRL0 Register
    str r1, [r0]

    movs r1, #1                     // Re-enable SSI
    str r1, [r3, #SSI_SSIENR_OFFSET]

    movs r1, #W25X10CL_CMD_READ_DATA_FAST_DUAL_IO   // 8b command = 0xBB
    str r1, [r3, #SSI_DR0_OFFSET]   // Push SPI command into TX FIFO
    movs r1, #0x0000002             // 28-bit Address for dummy read = 0x000000 + 0x2 Mode bits to set M[5:4]=10
    str r1, [r3, #SSI_DR0_OFFSET]   // Push Address into TX FIFO - this will trigger the transaction

// Now we wait for the read transaction to complete by monitoring the SSI
// status register and checking for the "RX FIFO Not Empty" flag to assert.

    movs r1, #SSI_SR_RFNE_BITS
00:
    ldr r0, [r3, #SSI_SR_OFFSET]    // Read status register
    tst r0, r1                      // RFNE status flag set?
    beq 00b                         // If not then wait

// At this point CN# will be deasserted and the SPI clock will not be running.
// The Winbond WX25X10CL device will be in continuous read, dual I/O mode and
// only expecting address bits after the next CN# assertion.  So long as we
// send 4'b0010 (and 4 more dummy HiZ bits) after every subsequent 24b address
// then the Winbond device will remain in continuous read mode.  This is the
// ideal mode for Execute-In-Place.
// (If we want to exit continuous read mode then we will need to switch back
// to APM mode and generate a 28-bit address phase with the extra nibble set
// to 4'b0000).

    movs r1, #0
    str r1, [r3, #SSI_SSIENR_OFFSET]   // Disable SSI (and clear FIFO) to allow further config

// Note that the INST_L field is used to select what XIP data gets pushed into
// the TX FIFO:
//      INST_L_0_BITS   {ADDR[23:0],XIP_CMD[7:0]}       Load "mode bits" into XIP_CMD
//      Anything else   {XIP_CMD[7:0],ADDR[23:0]}       Load SPI command into XIP_CMD

#define SPI_CTRLR0_XIP \
    (W25X10CL_MODE_CONTINUOUS_READ              /* Mode bits to keep Winbond in continuous read mode */ \
        << SSI_SPI_CTRLR0_XIP_CMD_LSB) | \
    (7 << SSI_SPI_CTRLR0_ADDR_L_LSB) |         /* Send 28 bits (24 address + 4 mode) */ \
    (2 << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) |    /* Hi-Z the other 4 mode bits (2 cycles @ dual I/O = 4 bits) */ \
    (SSI_SPI_CTRLR0_INST_L_VALUE_NONE          /* Do not send a command, instead send XIP_CMD as mode bits after address */ \
        << SSI_SPI_CTRLR0_INST_L_LSB) | \
    (SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_2C2A      /* Send Address in Dual I/O mode (and Command but that is zero bits long) */ \
        << SSI_SPI_CTRLR0_TRANS_TYPE_LSB)

    ldr r1, =(SPI_CTRLR0_XIP)
    ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET)
    str r1, [r0]

    movs r1, #1
    str r1, [r3, #SSI_SSIENR_OFFSET]   // Re-enable SSI

// We are now in XIP mode, with all transactions using Dual I/O and only
// needing to send 24-bit addresses (plus mode bits) for each read transaction.

// Pull in standard exit routine
#include "boot2_helpers/exit_from_boot2.S"

.global literals
literals:
.ltorg

.end
